# R script generates functions used in analyze-conjoint_data.R to estimate quantities of interest for conjoint analysis
# 'plotDifnDif': plot interactions for main sample
# 'plotDifnDif2': plot interactions for subgroups
# 'DifnDif3': estimate DiD to export for table
###################################################################################################

rm(list = ls())

library (sandwich)
library (mvtnorm)


# this function plots DiD for full sample.
plotDifnDif <- function (a="welfare", b="mrtg.crdt", subsetVar="none", subsetValue="none") {
  labels.a <- levels (surveyData[,grep(paste0("^",a,"$"), colnames(surveyData))])
  labels.b <- levels (surveyData[,grep(paste0("^",b,"$"), colnames(surveyData))])
  labels.a <- labels.a[-2]
  labels.b <- labels.b[-2]
  dataMat <- data.frame (Y=surveyData$forcedChoice
    , X1=surveyData[,grep(paste0("^",a,"$"), colnames(surveyData))]
    , X2=surveyData[,grep(paste0("^",b,"$"), colnames(surveyData))])
  if (subsetVar=="none") {
    model <- lm(Y ~ X1*X2, data=dataMat)
  } else {
    model <- lm(Y ~  X1*X2
      , data=subset (dataMat, surveyData[,grep(paste0("^",subsetVar,"$"), colnames(surveyData))] == subsetValue))
  }
  scans <- rmvnorm (10000, mean=coef(model), sigma=vcov(model))
  lolo <- scans %*% c(1,0,0,0,0,0,0,0,0)
  lohi <- scans %*% c(1,0,0,0,1,0,0,0,0)
  hilo <- scans %*% c(1,0,1,0,0,0,0,0,0)
  hihi <- scans %*% c(1,0,1,0,1,0,0,0,1)
  loDif <- lohi - lolo
  hiDif <- hihi - hilo
  DifnDif <- hiDif - loDif
  which.sign <- sign (mean (DifnDif))
  if (which.sign ==-1) {
    p.val <- length (DifnDif[DifnDif>0])/1000
  } else {
    p.val <- length (DifnDif[DifnDif<0])/1000
  }
  par (mfrow=c(1,1), mar=c(4.5,3,0,0), oma=c(0,0,0,0), las=0)
  plot (c(round (min (min(loDif),min(hiDif)), 1), round (max (max(loDif),max(hiDif)), 1)),c(0.1,0.9)
    , type="n", axes=F
    , xlab=""
    , ylab="")
  axis (1)
  # axis (2, at=c(0.1,0.9), labels=FALSE, tick=TRUE)
  mtext (paste0("Diff in E(Y) from high ", b, " to low ", b)
    , side=1
    , line=3, cex=1.3)
  mtext (paste0("Evaluated at two levels of ", a)
    , side=2
    , line=1.5, cex=1.3)
  # abline (v=0, lty=3)
  points (xy.coords(mean(loDif), 0.35), pch=19, cex=1.3)
  segments (x0=quantile(loDif, 0.083), x1=quantile(loDif, 0.917), y0=0.35, y1=0.35, lwd=3)
  points (xy.coords(mean(hiDif), 0.65), pch=19, cex=1.3)
  segments (x0=quantile(hiDif, 0.083), x1=quantile(hiDif, 0.917), y0=0.65, y1=0.65, lwd=3)
  par(las=1)
  mtext(text=c(labels.a[1], labels.a[2]), at=c(0.35, 0.65)
    , side=2
    , line=-1, cex=1.3)
  # text(xy.coords (c(mean(loDif), mean(hiDif)), c(0.35, 0.65))
  #      , labels=c(paste(a, labels.a[1], sep=":"), paste(a, labels.a[2], sep=":"))
  #      , pos=3)
  legend ("bottomleft"
    , legend=paste0("p-value for difference in differences is ", round(p.val, 2))
    , bty="n", cex=1.3)
}


# this function plots DiD for subgroups defined by user.
plotDifnDif2 <- function (a="welfare", b="mrtg.crdt", subsetVar="income.cat", subsetValue=c("Low Income","High Income")) {
  allVars <- c("mrtg.crdt","consum.crdt","welfare","unempl","tax")
  labels.a <- levels(surveyData[, a])
  labels.b <- levels(surveyData[, b])
  labels.a <- labels.a[-2]
  labels.b <- labels.b[-2]
  clusterVar <- surveyData$ResponseId
  Y=surveyData$forcedChoice
  X1=surveyData[,grep(paste0("^",a,"$"), colnames(surveyData))]
  X2=surveyData[,grep(paste0("^",b,"$"), colnames(surveyData))]
  Z =data.frame(surveyData[,is.element (colnames(surveyData), allVars[intersect (which(allVars != b), which (allVars != a))])])
  dataMat <- data.frame (Y,X1,X2,Z,clusterVar)
  dataMat1 <- subset (dataMat, surveyData[,grep(subsetVar, colnames(surveyData))] == subsetValue[1])
  dataMat2 <- subset (dataMat, surveyData[,grep(subsetVar, colnames(surveyData))] == subsetValue[2])
  model1 <- lm(Y ~ X1*X2 + ., data=dataMat1[,-grep("clusterVar",colnames(dataMat1))])
  model2 <- lm(Y ~ X1*X2 + ., data=dataMat2[,-grep("clusterVar",colnames(dataMat2))])
  cl.vcov1 <- vcovCL (model1, cluster=dataMat1[,grep("clusterVar",colnames(dataMat1))])
  cl.vcov2 <- vcovCL (model2, cluster=dataMat2[,grep("clusterVar",colnames(dataMat2))])
  scans1 <- rmvnorm (1000, mean=coef(model1), sigma=cl.vcov1)
  scans2 <- rmvnorm (1000, mean=coef(model2), sigma=cl.vcov2)
  lolo1 <- scans1 %*% c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  lohi1 <- scans1 %*% c(1,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
  hilo1 <- scans1 %*% c(1,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
  hihi1 <- scans1 %*% c(1,0,1,0,1,0,0,0,0,0,0,0,0,0,1)
  loDif1 <- lohi1 - lolo1
  hiDif1 <- hihi1 - hilo1
  lolo2 <- scans2 %*% c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  lohi2 <- scans2 %*% c(1,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
  hilo2 <- scans2 %*% c(1,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
  hihi2 <- scans2 %*% c(1,0,1,0,1,0,0,0,0,0,0,0,0,0,1)
  loDif2 <- lohi2 - lolo2
  hiDif2 <- hihi2 - hilo2
  diff.amce.1 <- sample (hiDif1) - sample(loDif1)
  diff.amce.2 <- sample (hiDif2) - sample(loDif2)
  which.sign.1 <- sign (mean (diff.amce.1))
  which.sign.2 <- sign (mean (diff.amce.2))
  if (which.sign.1 ==-1) {
    p.val.1 <- length (diff.amce.1[diff.amce.1>0])/1000
  } else {
    p.val.1 <- length (diff.amce.1[diff.amce.1<0])/1000
  }
  if (which.sign.2 ==-1) {
    p.val.2 <- length (diff.amce.2[diff.amce.2>0])/1000
  } else {
    p.val.2 <- length (diff.amce.2[diff.amce.2<0])/1000
  }
  par (mfrow=c(1,1), mar=c(4,5,0,0), oma=c(0,0,0,0), las=1)
  plot (c(round (min (min(loDif1),min(hiDif1),min(loDif2),min(hiDif1)),2), round (max (max(loDif1),max(hiDif1),max(loDif2),max(hiDif2)), 1)),c(0.1,0.9), type="n", axes=F
    , xlab=""
    , ylab="")
  axis (1)
  mtext (paste0("Difference in AMCE from high ", b, " to low ", b)
    , side=1
    , line=3, cex=1.3)
  par (las=0)
  mtext (paste0("Evaluated at two levels of ", a)
    , side=2
    , line=1.5, cex=1.3)
  abline (v=0, lty=2)
  points (xy.coords(mean(loDif1), 0.37), pch=19, cex=1.3, col="#F8766D")
  segments (x0=quantile(loDif1, 0.083), x1=quantile(loDif1, 0.917), y0=0.37, y1=0.37, lwd=3, col="#F8766D")
  points (xy.coords(mean(loDif2), 0.33), pch=19, cex=1.3, col="#00BFC4")
  segments (x0=quantile(loDif2, 0.083), x1=quantile(loDif2, 0.917), y0=0.33, y1=0.33, lwd=3, col="#00BFC4")
  points (xy.coords(mean(hiDif1), 0.67), pch=19, cex=1.3, col="#F8766D")
  segments (x0=quantile(hiDif1, 0.083), x1=quantile(hiDif1, 0.917), y0=0.67, y1=0.67, lwd=3, col="#F8766D")
  points (xy.coords(mean(hiDif2), 0.63), pch=19, cex=1.3, col="#00BFC4")
  segments (x0=quantile(hiDif2, 0.083), x1=quantile(hiDif2, 0.917), y0=0.63, y1=0.63, lwd=3, col="#00BFC4")
  text(xy.coords (c(mean(loDif1), mean(hiDif1)), c(0.27, 0.68))
    , labels=c(paste(a, labels.a[1], sep=": "), paste(a, labels.a[2], sep=":"))
    , pos=3)
  segments (x0=mean(loDif2), x1=mean(hiDif2), y0=0.33, y1=0.63, lty=3)
  segments (x0=mean(loDif1), x1=mean(hiDif1), y0=0.37, y1=0.67, lty=3)
  par(las=1)
  mtext(text=c(labels.a[1], labels.a[2]), at=c(0.35, 0.65)
    , side=2
    , line=-1, cex=1.3)
  text(xy.coords (c((mean(loDif1)+mean(hiDif1))/2, (mean(loDif2)+mean(hiDif2))/2), c(0.52, 0.48))
    , labels=c(paste("p", format(round (p.val.1,2), nsmall=2), sep=": "), paste("p", format(round(p.val.2,2), nsmall=2), sep=": "))
    , pos=1)
  legend ("topleft"
    , legend=subsetValue
    , pch=19
    , col=c("#F8766D","#00BFC4")
    , bty="n", cex=1.3)
}


# DifnDif3 produces a diff-in-diff coefficient, with attendant standard error, for a single subset
# estimates for subsets (note: by default, estimates for low-income respondents)
DifnDif3_sub <- function (a="welfare", b="mrtg.crdt", subsetVar="income.cat", subsetValue="Low Income") {
  allVars <- c("mrtg.crdt","consum.crdt","welfare","unempl","tax")
  labels.a <- sapply(surveyData[, a], levels)
  labels.b <- sapply(surveyData[, b], levels)
  labels.a <- labels.a[-2]
  labels.b <- labels.b[-2]
  clusterVar <- surveyData$ResponseId
  Y=surveyData$scaledChoice
  X1=surveyData[,grep(paste0("^",a,"$"), colnames(surveyData))]
  X2=surveyData[,grep(paste0("^",b,"$"), colnames(surveyData))]
  Z =data.frame(surveyData[,is.element (colnames(surveyData), allVars[intersect (which(allVars != b), which (allVars != a))])])
  dataMat <- data.frame (Y,X1,X2,Z,clusterVar)
  model <- lm(Y ~ X1*X2 + ., data=subset (dataMat[,-grep("clusterVar",colnames(dataMat))], surveyData[,grep(subsetVar, colnames(surveyData))] == subsetValue))
  cl.vcov <- vcovCL (model, cluster=subset (dataMat[,grep("clusterVar",colnames(dataMat))], surveyData[,grep(subsetVar, colnames(surveyData))] == subsetValue))
  scans <- rmvnorm (10000, mean=coef(model), sigma=cl.vcov)
  lolo <- scans %*% c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  lohi <- scans %*% c(1,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
  hilo <- scans %*% c(1,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
  hihi <- scans %*% c(1,0,1,0,1,0,0,0,0,0,0,0,0,0,1)
  loDif <- hilo - lolo
  hiDif <- hihi - lohi
  DifnDif <- hiDif - loDif
  output <- list(mean(DifnDif), sd(DifnDif), mean(DifnDif)/sd(DifnDif))
  names (output) <- c("Coefficient", "SE", "t_stat")
  return (output)
}


# DifnDif3bis produces a diff-in-diff coefficient, with attendant standard error, based on vcov matrix, not simulations
# estimates for full sample (does not allow subsetting!)
DifnDif3bis <- function (a="welfare", b="consum.crdt") {
  allVars <- c("mrtg.crdt","consum.crdt","welfare","unempl","tax")
  labels.a <- levels(surveyData[, a])
  labels.b <- levels(surveyData[, b])
  labels.a <- labels.a[-c(2:(length(labels.a)-1))]
  labels.b <- labels.b[-c(2:(length(labels.b)-1))]
  clusterVar <- surveyData$ResponseId
  Y=surveyData$forcedChoice
  X1=surveyData[,grep(paste0("^",a,"$"), colnames(surveyData))]
  X2=surveyData[,grep(paste0("^",b,"$"), colnames(surveyData))]
  Z =data.frame(surveyData[,is.element (colnames(surveyData), allVars[intersect (which(allVars != b), which (allVars != a))])])
  dataMat <- data.frame (Y,X1,X2,Z,clusterVar)
  model <- lm(Y ~ X1*X2 + ., data= dataMat[,-grep("clusterVar",colnames(dataMat))])
  cl.vcov <- vcovCL (model, cluster= dataMat[,grep("clusterVar",colnames(dataMat))])
  DifnDif <- coef(model)[length(coef(model))]   
  se.DifnDif <- sqrt (cl.vcov[length(coef(model)),length(coef(model))])
  output <- list(DifnDif, se.DifnDif, DifnDif/se.DifnDif)
  names (output) <- c("Coefficient", "SE", "t_stat")
  return (output)
}


# DifnDif3bis produces a diff-in-diff coefficient, with attendant standard error, based on vcov matrix, not simulations
DifnDif3bis_sub <- function (a="tax", b="consum.crdt", subsetVar="income.cat", subsetValue="High Income") {
   allVars <- c("mrtg.crdt","consum.crdt","welfare","unempl","tax")
   labels.a <- levels(surveyData[, a])
   labels.b <- levels(surveyData[, b])
   labels.a <- labels.a[-c(2:(length(labels.a)-1))]
   labels.b <- labels.b[-c(2:(length(labels.b)-1))]
   clusterVar <- surveyData$ResponseId
   Y=surveyData$forcedChoice
   X1=surveyData[,grep(paste0("^",a,"$"), colnames(surveyData))]
   X2=surveyData[,grep(paste0("^",b,"$"), colnames(surveyData))]
   Z =data.frame(surveyData[,is.element (colnames(surveyData), allVars[intersect (which(allVars != b), which (allVars != a))])])
   dataMat <- data.frame (Y,X1,X2,Z,clusterVar)
   model <- lm(Y ~ X1*X2 + ., data=subset (dataMat[,-grep("clusterVar",colnames(dataMat))], surveyData[,grep(subsetVar, colnames(surveyData))] == subsetValue))
   ## commented out for the time being because it threw an error
   # cl.vcov <- vcovCL (model, cluster=as.numeric(subset (dataMat[,grep("clusterVar",colnames(dataMat))], surveyData[,grep(subsetVar, colnames(surveyData))] == subsetValue)))
   cl.vcov <- vcovCL (model, cluster=subset (dataMat[,grep("clusterVar",colnames(dataMat))], surveyData[,grep(subsetVar, colnames(surveyData))] == subsetValue))
   DifnDif <- coef(model)[length(coef(model))]   
   se.DifnDif <- sqrt (cl.vcov[length(coef(model)),length(coef(model))])
   output <- list(DifnDif, se.DifnDif, DifnDif/se.DifnDif)
   names (output) <- c("Coefficient", "SE", "t_stat")
   return (output)
}
